library(Rcpp)
cppFunction('
#include <numeric>
int gcd(int a, int b) {
return std::gcd(a, b);
}')
cppFunction('
#include <numeric>
int lcm(int a, int b) {
return std::lcm(a, b);
}
')STATS 506 Problem Set #5
OOP Programming
- Here’s the inclusion of gcd and lcm method using Rcpp
Here’s the class definition and class methods.
setClass('rational',
slots = list(numerator = 'integer',
denominator = 'integer'))
##' Constructor
##' @param numerator An integer value
##' @param denominator An none zero integer value
##' @return
##' @export
rational <- function(numerator = 0, denominator = 1) {
if (!is.numeric(numerator) | !is.numeric(denominator)) {
stop('both numerator and denominator must be integer')
}
if (numerator != as.integer(numerator) |
denominator !=as.integer(denominator)) {
stop('both numerator and denominator cannot be floating point')
}
return(new('rational', numerator = as.integer(numerator),
denominator = as.integer(denominator)))
}
setValidity('rational', function(object){
if (object@denominator == 0L) {
stop('denominator cannot be 0')
}
return(TRUE)
})Class "rational" [in ".GlobalEnv"]
Slots:
Name: numerator denominator
Class: integer integer
setMethod('show', 'rational',
function(object) {
cat(object@numerator, '/', object@denominator, '\n')
return(invisible(object))
})
setGeneric('simplify', function(object) standardGeneric('simplify'))[1] "simplify"
setMethod('simplify', 'rational', function(object) {
# compute the unsigned greatest common divisor
divisor <- gcd(abs(object@numerator), abs(object@denominator))
# simplify the fraction and sign
new_num <- object@numerator / divisor
new_den <- object@denominator / divisor
if (new_num < 0 & new_den < 0) {
new_num <- abs(new_num)
new_den <- abs(new_den)
}
return(rational(new_num, new_den))
})
setGeneric('quotient', function(object, digits = 1) standardGeneric('quotient'))[1] "quotient"
setMethod('quotient', 'rational', function(object, digits = 1) {
# round() will floor the digits if it's not integer
if (!is.numeric(digits) | digits < 0) {
stop('digit must be none negative real number')
}
quotient_value <- object@numerator / object@denominator
print(round(quotient_value, digits))
return(invisible(quotient_value))
})
setMethod('+', signature(e1 = 'rational', e2 = 'rational'), function(e1, e2) {
denom <- lcm(e1@denominator, e2@denominator)
numer <- (e1@numerator * (denom / e1@denominator)) +
(e2@numerator * (denom / e2@denominator))
return(simplify(rational(numer, denom)))
})
setMethod('-', signature(e1 = 'rational', e2 = 'rational'), function(e1, e2) {
denom <- lcm(e1@denominator, e2@denominator)
numer <- (e1@numerator * (denom / e1@denominator)) -
(e2@numerator * (denom / e2@denominator))
return(simplify(rational(numer, denom)))
})
setMethod('*', signature(e1 = 'rational', e2 = 'rational'), function(e1, e2) {
numer <- e1@numerator * e2@numerator
denom <- e1@denominator * e2@denominator
return(simplify(rational(numer, denom)))
})
setMethod('/', signature(e1 = 'rational', e2 = 'rational'), function(e1, e2) {
numer <- e1@numerator * e2@denominator
denom <- e1@denominator * e2@numerator
if (denom == 0L) stop('division by zero')
return(simplify(rational(numer, denom)))
})- Here’s the demonstration
r1 <- rational(24, 6)
r2 <- rational(7, 230)
r3 <- rational(0, 4)
r124 / 6
r30 / 4
r1 + r2927 / 230
r1 - r2913 / 230
r1 * r214 / 115
r1 / r2920 / 7
r1 + r34 / 1
r1 * r30 / 1
r2 / r3Error in r2/r3: division by zero
quotient(r1)[1] 4
quotient(r2)[1] 0
quotient(r2, digits = 3)[1] 0.03
quotient(r2, digits = 3.14)[1] 0.03
quotient(r2, digits = 'avocado')Error in quotient(r2, digits = "avocado"): digit must be none negative real number
q2 <- quotient(r2, digits = 3)[1] 0.03
q2[1] 0.03043478
quotient(r3)[1] 0
simplify(r1)4 / 1
simplify(r2)7 / 230
simplify(r3)0 / 1
- Here’s the checking of malformed input to constructor
rational(1, 0)Error in validityMethod(object): denominator cannot be 0
rational(3.14, 1)Error in rational(3.14, 1): both numerator and denominator cannot be floating point
rational(1, 3.14)Error in rational(1, 3.14): both numerator and denominator cannot be floating point
rational('A', 1)Error in rational("A", 1): both numerator and denominator must be integer
rational(1, 'A')Error in rational(1, "A"): both numerator and denominator must be integer
Plotly
- Here’s the recreation using plotly
library(tibble)
library(dplyr)
library(reshape2)
library(plotly)
# read & proces data (same as Pset4)
df <- as_tibble(read.csv('./df_for_ml_improved_new_market.csv'))
genre_columns <- grep('Genre___', colnames(df), value = TRUE)
genre_data <- df[, c('id', 'year', 'price_usd', genre_columns)] %>%
melt(id.vars = c('id', 'year', 'price_usd')) %>%
filter(value == 1) %>%
mutate(genre=sub('Genre___', '', variable)) %>%
select(id, year, price_usd, genre)
# Ex: If the genre is both 'Other' and 'Painting', the final genre should be
# 'Painting'
genre_priority <- c('Photography', 'Print', 'Sculpture', 'Painting', 'Others')
genre_data$genre <- factor(genre_data$genre, levels = genre_priority)
df <- genre_data[!duplicated(genre_data$id), 2:4]
genre_data <- df %>%
count(year, genre) %>%
group_by(year) %>%
mutate(percent = n / sum(n))
plot_ly(data = genre_data, x = ~percent, y = ~year, color = ~genre,
type = 'bar', orientation = 'h') %>%
layout(
barmode = 'stack',
title = 'Genre Distribution Over Years',
xaxis = list(title = 'Percentage'),
yaxis = list(
title = 'Year',
tickmode = 'array',
tickvals = unique(genre_data$year),
ticktext = unique(genre_data$year),
categoryorder = 'trace'
),
legend = list(title = list(text = 'Genre'))
)- Here’s the interactive plot
plot_data <- df %>%
group_by(year, genre) %>%
summarize(avg_price_usd = mean(price_usd, na.rm = TRUE)) %>%
ungroup()
traces <- list()
# overall trend
overall_trace <- plot_data %>%
group_by(year) %>%
summarize(avg_price_usd = mean(avg_price_usd, na.rm = TRUE))
traces[['Overall']] <- list(
x = overall_trace$year,
y = overall_trace$avg_price_usd,
type = 'scatter',
mode = 'lines+markers',
name = 'Overall'
)
# genre-specific trend
genres <- unique(plot_data$genre)
for (genre in genres) {
genre_trace <- plot_data %>% filter(genre == !!genre)
traces[[genre]] <- list(
x = genre_trace$year,
y = genre_trace$avg_price_usd,
type = 'scatter',
mode = 'lines+markers',
name = genre
)
}
fig <- plot_ly()
for (trace_name in names(traces)) {
fig <- fig %>%
add_trace(
x = traces[[trace_name]]$x,
y = traces[[trace_name]]$y,
type = traces[[trace_name]]$type,
mode = traces[[trace_name]]$mode,
name = trace_name,
visible = ifelse(trace_name == 'Overall', TRUE, FALSE)
)
}
# drop down menu
fig <- fig %>%
layout(
title = 'Change in Sales Price Over Time by Genre',
xaxis = list(title = 'Year'),
yaxis = list(title = 'Average Price (USD)'),
updatemenus = list(list(
buttons = lapply(names(traces), function(genre) {
list(
method = 'update',
# only show selected button serie
args = list(list(visible = sapply(names(traces), function(x) x == genre))),
label = genre
)}),
direction = 'down',
x = 0.08,
y = 1.1
)))
figdata.table
# prepare data
library(nycflights13)
library(data.table)
data(flights); data(airports)
setDT(flights)
setDT(airports)- first table
# summary statistics
table1 <- flights[, .(
mean_delay = mean(dep_delay, na.rm = TRUE),
median_delay = median(dep_delay, na.rm = TRUE),
n_flights = .N
), by = origin][n_flights >= 10]
# join with origin for no match
table1 <- merge(table1[, faa := origin], airports,
by = "faa", all.x = TRUE)
table1[, .(name, mean_delay, median_delay)][order(-mean_delay)] name mean_delay median_delay
<char> <num> <num>
1: Newark Liberty Intl 15.10795 -1
2: John F Kennedy Intl 12.11216 -1
3: La Guardia 10.34688 -3
second table
# summary statistics
table2 <- flights[, .(
mean_delay = mean(arr_delay, na.rm = TRUE),
median_delay = median(arr_delay, na.rm = TRUE),
n_flights = .N
), by = dest][n_flights >= 10]
table2 <- merge(table2[, faa := dest], airports,
by = "faa", all.x = TRUE)
table2 <- table2[, name := ifelse(is.na(name), dest, name)]
table2 <- table2[, .(name, mean_delay, median_delay)][order(-mean_delay)]
print(table2, nrows = 102) name mean_delay median_delay
<char> <num> <num>
1: Columbia Metropolitan 41.76415094 28.0
2: Tulsa Intl 33.65986395 14.0
3: Will Rogers World 30.61904762 16.0
4: Jackson Hole Airport 28.09523810 15.0
5: Mc Ghee Tyson 24.06920415 2.0
6: Dane Co Rgnl Truax Fld 20.19604317 1.0
7: Richmond Intl 20.11125320 1.0
8: Akron Canton Regional Airport 19.69833729 3.0
9: Des Moines Intl 19.00573614 0.0
10: Gerald R Ford Intl 18.18956044 1.0
11: Birmingham Intl 16.87732342 -2.0
12: Theodore Francis Green State 16.23463687 1.0
13: Greenville-Spartanburg International 15.93544304 -0.5
14: Cincinnati Northern Kentucky Intl 15.36456376 -3.0
15: Savannah Hilton Head Intl 15.12950601 -1.0
16: Manchester Regional Airport 14.78755365 -3.0
17: Eppley Afld 14.69889841 -2.0
18: Yeager 14.67164179 -1.5
19: Kansas City Intl 14.51405836 0.0
20: Albany Intl 14.39712919 -4.0
21: General Mitchell Intl 14.16722038 0.0
22: Piedmont Triad 14.11260054 -2.0
23: Washington Dulles Intl 13.86420212 -3.0
24: Cherry Capital Airport 12.96842105 -10.0
25: James M Cox Dayton Intl 12.68048606 -3.0
26: Louisville International Airport 12.66938406 -2.0
27: Chicago Midway Intl 12.36422360 -1.0
28: Sacramento Intl 12.10992908 4.0
29: Jacksonville Intl 11.84483416 -2.0
30: Nashville Intl 11.81245891 -2.0
31: Portland Intl Jetport 11.66040210 -4.0
32: Greater Rochester Intl 11.56064461 -5.0
33: Hartsfield Jackson Atlanta Intl 11.30011285 -1.0
34: Lambert St Louis Intl 11.07846451 -3.0
35: Norfolk Intl 10.94909344 -4.0
36: Baltimore Washington Intl 10.72673385 -5.0
37: Memphis Intl 10.64531435 -2.5
38: Port Columbus Intl 10.60132291 -3.0
39: Charleston Afb Intl 10.59296847 -4.0
40: Philadelphia Intl 10.12719014 -3.0
41: Raleigh Durham Intl 10.05238095 -3.0
42: Indianapolis Intl 9.94043412 -3.0
43: Charlottesville-Albemarle 9.50000000 -5.0
44: Cleveland Hopkins Intl 9.18161129 -5.0
45: Ronald Reagan Washington Natl 9.06695204 -2.0
46: Burlington Intl 8.95099602 -4.0
47: Buffalo Niagara Intl 8.94595186 -5.0
48: Syracuse Hancock Intl 8.90392501 -5.0
49: Denver Intl 8.60650021 -2.0
50: Palm Beach Intl 8.56297210 -3.0
51: BQN 8.24549550 -1.0
52: Bob Hope 8.17567568 -3.0
53: Fort Lauderdale Hollywood Intl 8.08212154 -3.0
54: Bangor Intl 8.02793296 -9.0
55: Asheville Regional Airport 8.00383142 -1.0
56: PSE 7.87150838 0.0
57: Pittsburgh Intl 7.68099053 -5.0
58: Gallatin Field 7.60000000 -2.0
59: NW Arkansas Regional 7.46572581 -2.0
60: Tampa Intl 7.40852503 -4.0
61: Charlotte Douglas Intl 7.36031885 -3.0
62: Minneapolis St Paul Intl 7.27016886 -5.0
63: William P Hobby 7.17618819 -4.0
64: Bradley Intl 7.04854369 -10.0
65: San Antonio Intl 6.94537178 -9.0
66: South Bend Rgnl 6.50000000 -3.5
67: Louis Armstrong New Orleans Intl 6.49017497 -6.0
68: Key West Intl 6.35294118 7.0
69: Eagle Co Rgnl 6.30434783 -4.0
70: Austin Bergstrom Intl 6.01990875 -5.0
71: Chicago Ohare Intl 5.87661475 -8.0
72: Orlando Intl 5.45464309 -5.0
73: Detroit Metro Wayne Co 5.42996346 -7.0
74: Portland Intl 5.14157973 -5.0
75: Nantucket Mem 4.85227273 -3.0
76: Wilmington Intl 4.63551402 -7.0
77: Myrtle Beach Intl 4.60344828 -13.0
78: Albuquerque International Sunport 4.38188976 -5.5
79: George Bush Intercontinental 4.24079040 -5.0
80: Norman Y Mineta San Jose Intl 3.44817073 -7.0
81: Southwest Florida Intl 3.23814963 -5.0
82: San Diego Intl 3.13916574 -5.0
83: Sarasota Bradenton Intl 3.08243131 -5.0
84: Metropolitan Oakland Intl 3.07766990 -9.0
85: General Edward Lawrence Logan Intl 2.91439222 -9.0
86: San Francisco Intl 2.67289152 -8.0
87: SJU 2.52052659 -6.0
88: Yampa Valley 2.14285714 2.0
89: Phoenix Sky Harbor Intl 2.09704733 -6.0
90: Montrose Regional Airport 1.78571429 -10.5
91: Los Angeles Intl 0.54711094 -7.0
92: Dallas Fort Worth Intl 0.32212685 -9.0
93: Miami Intl 0.29905978 -9.0
94: Mc Carran Intl 0.25772849 -8.0
95: Salt Lake City Intl 0.17625459 -8.0
96: Long Beach -0.06202723 -10.0
97: Martha\\\\'s Vineyard -0.28571429 -11.0
98: Seattle Tacoma Intl -1.09909910 -11.0
99: Honolulu Intl -1.36519258 -7.0
100: STT -3.83590734 -9.0
101: John Wayne Arpt Orange Co -7.86822660 -11.0
102: Palm Springs Intl -12.72222222 -13.5
name mean_delay median_delay
- Here’s the table
data(planes)
setDT(planes)
table3 <- merge(flights, planes,
by = "tailnum", all.x = TRUE)
table3 <- table3[, mph := 60 * distance / air_time]
table3 <- table3[, .(
avg_mph = mean(mph, na.rm = TRUE),
n_flights = .N
), by = model]
table3[order(-avg_mph)][1] model avg_mph n_flights
<char> <num> <int>
1: 777-222 482.6254 4